{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import scipy.spatial.distance as ssd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 用户特征向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "r1 = np.array([1,4,2,1])\n",
    "r2 = np.array([2,4,2,1])\n",
    "r3 = np.array([5,1,5,4])\n",
    "r4 = np.array([2,5,3,4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# person系数\n",
    "\n",
    "def sim(v1, v2):\n",
    "    # 用户1和用户2 的平均打分\n",
    "    v1_mu= sum(v1) / len(v1)\n",
    "    v2_mu= sum(v2) / len(v2)\n",
    "    # 特征向量取均值\n",
    "    v1_ = v1 - v1_sum\n",
    "    v2_ = v2 - v2_sum\n",
    "    # 返回相关系数\n",
    "    return 1-ssd.cosine(v1_, v2_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* itemcf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>item1</th>\n",
       "      <th>item2</th>\n",
       "      <th>item3</th>\n",
       "      <th>item4</th>\n",
       "      <th>item5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>user1</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>user2</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>user3</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>user4</th>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       item1  item2  item3  item4  item5\n",
       "user1      1      4      2      1      0\n",
       "user2      2      4      2      1      5\n",
       "user3      5      1      5      4      2\n",
       "user4      2      5      3      4      5"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_data = pd.read_csv('C:/Users/Administrator/Desktop/test_data1.csv', sep=',')\n",
    "df_data.index = ['user1', 'user2', 'user3', 'user4']\n",
    "df_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 计算user2-user4的平均值向量.loc是根据Index来索引对应的行，从而计算用户对这5件商品的平均打分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[2.8, 3.4, 3.8]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_mu = [\n",
    "    sum(df_data.loc[index]) / len(df_data.loc[index])\n",
    "    for index in df_data.iloc[1:4].index\n",
    "]\n",
    "user_mu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0.84561791,  0.88176419, -0.88176419, -0.83262711])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 相关系数\n",
    "def sim(v1, v2):\n",
    "    v1_ = v1 - user_mu\n",
    "    v2_ = v2 - user_mu\n",
    "    return 1 - ssd.cosine(v1_, v2_)\n",
    "# 得到item1 - item5 的相关系数向量\n",
    "sim_v = np.array(\n",
    "    [sim(df_data['item5'][1:4],df_data[item][1:4])\n",
    "    for item in ['item1', 'item2', 'item3', 'item4']\n",
    "    ]\n",
    ")\n",
    "sim_v"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "计算后的预测分数是： 0.024778902847947197\n"
     ]
    }
   ],
   "source": [
    "# 预测打分\n",
    "predict_rating = np.dot(sim_v, df_data.iloc[0][0:4]) /  sum(np.abs(sim_v))\n",
    "print('计算后的预测分数是：',predict_rating)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "最终预测打分： 0.024778902847947197\n"
     ]
    }
   ],
   "source": [
    "# predict_rating = np.clip(predict_rating, 05)\n",
    "print('最终预测打分：',predict_rating )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
